####################################################
# ANALYZENETW.R
#
# This file contains several functions to analyze a given network
#
# Functions:
#   network.stats             compute network statistics
#   pareto.plot			draw pareto plot of distribution of some vector
#   pareto.plot.degree        draw pareto plot of degree distribution of network
#   plot.degree.clustering    draw plot of clustering vs. degree
#   plot.degree.assortativity draw plot of degree vs. neighboring degree
#   get.giant.component       extracts the giant component from the network
#   get.connected.triplelist  lists all connected triples A->B->C in graph
#   transitive.triples        compute the fraction of transitive triples
#
# Author: Marco van der Leij
#
#####################################################

library(igraph)

######################################################
# network.stats(g, compute.distances=FALSE)
#
# This function computes summary statistics of the network,
# namely: number of authors, average degree, std.dev. degree,
# size of giant component, percentage size of giant component,
# size of second largest component, number of isolated authors,
# percentage of isolated authors, total clustering coefficient,
# average clustering coefficient, average distance in giant
# component, std.dev. distance in giant component and diameter
#
# Arguments:
#   g                  igraph object
#   compute.distances  whether or not distances should also be computed
#
# Author: M.J. van der Leij
# Date first code: 17/12/2008
#
network.stats <- function(g, compute.distances=FALSE) {

  # check g object
  if (!is.igraph(g)) stop("g is not an igraph object")

  # compute some stats
  n <- vcount(g)                                  # number of nodes
  deg <- degree(g)                                # degrees
  isol <- sum(deg==0)                             # number of isolated nodes
  comps <- clusters(g, mode="weak")               # info on components

  # component sizes
  compsize <- sort(comps$csize, decreasing=TRUE)
  gc <- compsize[1]
  c2 <- compsize[2]

  # degree correlation (assortativity measure)
  es <- get.edges(g, E(g)) + 1
  dc <- cor(deg[es[,1] ], deg[es[,2] ])

  # print information
  cat("Number of nodes:", n, "\n\n")

  cat("Degree\n")
  cat("  Average:", mean(deg), "\n")
  cat("  Std.Dev.:", sd(deg), "\n\n")

  cat("Giant component\n")
  cat("  Size:", gc, "\n")
  cat("  As perc.:", gc/n, "\n")
  cat("Second largest component:", c2, "\n\n")

  cat("Isolated nodes\n")
  cat("  Number:", isol, "\n")
  cat("  As perc.:", isol/n, "\n\n")

  cat("Clustering coefficient\n")
  cat("  Total:", transitivity(g, type="global"), "\n")
  cat("  Average:", mean(transitivity(g, type="local"), na.rm=TRUE), "\n\n")

  cat("Degree correlation:", dc, "\n\n")

  if (compute.distances) {

    # get nodes in giant component
    g1 <- get.giant.component(g)
    
    disthist <- path.length.hist(g1, directed=FALSE)$res
    diameter <- length(disthist)
    avdist <- weighted.mean(1:diameter, disthist)
    sddist <- sqrt(weighted.mean( ((1:diameter)-avdist)^2, disthist))

    cat("Distance in giant component\n")
    cat("  Average:", avdist, "\n")
    cat("  Std.Dev.:", sddist, "\n")
    cat("Diameter:", diameter, "\n")
  }
}

##############################################################
# pareto.plot(x, add=FALSE, ...)
#
# This function creates a Pareto plot of the distribution of an array x
#
# Arguments:
#     x              an array with values
#     add            indicator whether only points should be added to existing plot
#     ...            other arguments to be parsed to plot function
#
# Author: M.J. van der Leij
# Date: 14 May 2010
#
pareto.plot <- function(x, add=FALSE, ...) {

  values <- sort(unique(x))

  # compute tail distribution
  fx <- numeric(length(values))
  for (i in 1:length(values)) fx[i]<-sum(x==values[i])
  fx <- fx/length(x)
  ffx <- cumsum(fx)
  tail <- 1 - ffx + fx

  if (add) points(values, tail,...)
  else {
    plot(values, tail,
      ylab="tail distribution", log="xy", ...)
  }
}

##############################################################
# pareto.plot.degree(g, vids=NULL, mode="all", add=FALSE, ...)
#
# This function creates a Pareto plot of the degree distribution
#
# Arguments:
#     g              igraph network
#     vids           sequence of nodes to be considered
#     mode           "all", "in", or "out" for degree, indegree or outdegree
#     add            indicator whether only points should be added to existing plot
#     ...            other arguments to be parsed to plot function
#
# Author: M.J. van der Leij
# Date: 17 December 2008
#
pareto.plot.degree <- function(g, vids=NULL, mode="all", add=FALSE, ...) {

  # check g object
  if (!is.igraph(g)) stop("g is not an igraph object")
  if (is.null(vids)) vids <- V(g)

  # compute degree
  x <- degree(g, v=vids, mode=mode)

  deglabel <- "degree"
  if (mode == "in")  deglabel <- "in-degree"
  else if (mode == "out")  deglabel <- "out-degree"

  pareto.plot(x, add, xlab=deglabel, ...)
}

#########################################################
# plot.degree.clustering(g, vids=NULL, mode="all", add=FALSE, ...)
#
# This function draws a plot with on the x-axis the degree and
# on the y-axis the average clustering for that degree.
#
# Arguments:
#     g              igraph network
#     vids           sequence of nodes to be considered
#     mode           "all", "in", or "out" for degree, indegree or outdegree
#     add            indicator whether only points should be added to existing plot
#     ...            other arguments to be parsed to plot function
#
# Author: M.J. van der Leij
# Date: 17 December 2008
#
plot.degree.clustering <- function(g, vids=NULL, mode="all", add=FALSE, ...) {

  # check g object
  if (!is.igraph(g)) stop("g is not an igraph object")
  if (is.null(vids)) vids <- V(g)

  # compute degree
  x <- degree(g, v=vids, mode=mode)
  values <- sort(unique(x))

  # compute clustering for given degree
  cl <- transitivity(g, type="local", vids=vids)
  meancl <- numeric(length(values))
  for (i in 1:length(values)) meancl[i]<-mean(cl[x==values[i]])

  deglabel <- "degree"
  if (mode == "in")  deglabel <- "in-degree"
  else if (mode == "out")  deglabel <- "out-degree"

  if (add)  points(values, meancl,...)
  else {
    plot(values, meancl,
    xlab=deglabel, ylab="clustering", log="xy",...)
  }
}
#########################################################
# plot.degree.prod(g, vids=NULL, mode="all", add=FALSE, ...)
#
# This function draws a plot with on the x-axis the degree and
# on the y-axis the average productivity for that degree.
#
# Arguments:
#     g              igraph network
#     vids           sequence of nodes to be considered
#     mode           "all", "in", or "out" for degree, indegree or outdegree
#     add            indicator whether only points should be added to existing plot
#     ...            other arguments to be parsed to plot function
#
# Author: Lorenzo Ductor
# Date: 22 April 2014
#
plot.degree.prod <- function(g, vids=NULL, pr, mode="all", add=FALSE, ...) {

  # check g object
  if (!is.igraph(g)) stop("g is not an igraph object")
  if (is.null(vids)) vids <- V(g)$name

  # compute degree
  x <- degree(g, v=vids, mode=mode)
  values <- sort(unique(x))

  # compute productivity for given degree
  
  meanpr <- numeric(length(values))
  for (i in 1:length(values)) meanpr[i]<-mean(pr[x==values[i]])

  deglabel <- "degree"
  if (mode == "in")  deglabel <- "in-degree"
  else if (mode == "out")  deglabel <- "out-degree"

  if (add)  points(values, meanpr,...)
  else {
    plot(values, meanpr,
    xlab=deglabel, ylab="productivity", log="xy",...)
  }
}


#########################################################
# plot.degree.assortativity(g, eids=NULL, imode="all", jmode="all", add=FALSE, ...)
#
# This function draws a plot with on the x-axis the degree of a node i and
# on the y-axis the average degree of the neighboring nodes (neighboring i).
# Currently this function ignores direction, i.e. it treats the network 
# as undirected.
#
# Arguments:
#     g              igraph network
#     eids           sequence of edges to be considered
#     imode          the degree mode to be considered for the source vertex of the edge
#     jmode          the degree mode to be considered for the destination vertex of the edge
#     add            indicator whether only points should be added to existing plot
#     ...            other arguments to be parsed to plot function
#
# Author: M.J. van der Leij
# Date: 6 February 2009
#
plot.degree.assortativity <- function(g, eids=NULL, imode="all", jmode="all", add=FALSE, ...) {

  # check g object
  if (!is.igraph(g)) stop("g is not an igraph object")
  if (is.null(eids)) eids <- E(g)

  # compute degree
  ideg <- degree(g, mode=imode)
  jdeg <- degree(g, mode=jmode)

  # degree correlation (assortativity measure)
  es <- get.edges(g, E(g)) + 1

  xi <- ideg[es[,1] ]
  xj <- jdeg[es[,2] ]

  values <- sort(unique(xi))

  # compute clustering for given degree
  meanneighx <- numeric(length(values))
  for (i in 1:length(values)) meanneighx[i]<-mean(xj[xi==values[i]])

  ideglabel <- "degree"
  if (imode == "in")  ideglabel <- "in-degree"
  else if (imode == "out")  ideglabel <- "out-degree"

  jdeglabel <- "degree neighbor"
  if (jmode == "in")  jdeglabel <- "in-degree neighbor"
  else if (jmode == "out")  jdeglabel <- "out-degree neighbor"


  if (add)  points(values, meanneighx,...)
  else {
    plot(values, meanneighx,
    xlab=ideglabel, ylab=jdeglabel, log="xy",...)
  }
}

########################################################
# get.giant.component(g)
#
# This function extracts the largest (weak) component from the network
# and returns it as an igraph object.
#
# Arguments:
#     g            igraph network
#
# Author: M.J. van der Leij
# Date: 14 March 2009
#
get.giant.component <- function(g) {

  if (!is.igraph(g)) stop("g is not an igraph object")

  comps <- clusters(g, mode="weak")
  induced.subgraph(g, which(which.max(comps$csize)==comps$membership))
}

##################################
# get.connected.triplelist(g)
#
# This function lists all the connected triples in the network. It
# returns a kx3 matrix with in each row the vertex ids of a connected triple.
# Calling the vertex in the first column A, the second column B, and the 
# third column C, the triple has connections A->B and B->C in case of a
# directed network, and A--B and B--C in case of an undirected network.
#
# Arguments:
#     g            igraph network
#
# Author: M.J. van der Leij
# Date: 14 May 2010
#
get.connected.triplelist <- function(g) {
  if (is.directed(g)) {
    get.vertex.triplelist <- function(v) {
      vinnei <- neighbors(g, v, mode="in")
      voutnei <- neighbors(g, v, mode="out")
      get.v.vin.triplelist <- function(vin, v, voutnei) {
        vouts <- voutnei[voutnei != vin]
        if (length(vouts)>0) rbind(vin, v, vouts)
      }
      if ( length(vinnei)>0 && length(voutnei)>0 ) {
        lapply(vinnei, get.v.vin.triplelist, v, voutnei)
      }
    }
  }
  else {
    get.vertex.triplelist <- function(v) {
      vnei <- neighbors(g, v, mode="all")
      if (length(vnei)>1) {
        lapply(vnei[2:length(vnei)], function(vin) rbind(vin,v,vnei[vnei<vin]))
      }
    }
  }
  gvt <- unlist(lapply(as.integer(V(g)), get.vertex.triplelist))
  t(array(gvt, dim=c(3,length(gvt)/3)))
}

#########################################################
# transitive.triples(g, type="global", vids=NULL)
#
# This function counts the fraction of transitive triples in the network,
# that is, the fraction of triples A->B->C, for which also A->C.
# This is a measure of clustering, specially for directed networks.
#
# Arguments:
#     g              igraph network
#     type           "global": fraction of transitive triples of whole network
#                    "local": fraction of transitive triples for each
#                          vertex in vids
#     vids           vertices for which fraction of transitive triples is
#                      calculated. If vids=NULL then all nodes are considered.
#
# Author: M.J. van der Leij
# Date: 12 February 2009
# Adjusted: 15 August 2011
#
transitive.triples <- function(g, type="global", vids=NULL) {

  if (!is.igraph(g)) stop("g is not an igraph object")
  if (!is.directed(g)) stop("g should be a directed graph")

  if (type=="global") {
    x <- triad.census(g)
    ( x[9]+2*x[12]+2*x[13]+x[14]+3*x[15]+6*x[16] )/( 
         x[6]+x[7]+x[8]+x[9]+3*x[10]+2*x[11]+2*x[12]+2*x[13]+3*x[14]+4*x[15]+6*x[16] )  
  }
  else if (type=="local") {
    # calculate fraction of transitive triples for each vertex in vids
    if (is.null(vids)) vids <- V(g)
    triples <- data.frame(get.connected.triplelist(g))
    names(triples) <- c("v1", "v2", "v3")
    
    vertex.tt <- function(v) {
      v.triples <- subset(triples, v1==v, select=c(v1,v3) )
      if (nrow(v.triples)>0) {
        mean(as.integer(mapply(function(vv1, vv2) are.connected(g, vv1, vv2), 
                      v.triples$v1, v.triples$v3)))
      }
      else NA
    }
    sapply(as.integer(vids), vertex.tt)
  }
  else stop("type argument is not correct: should be global or local")

}